import json

import scrapy

from ScrapyObject.spiders.utils.url_utils import *

'''
*****************************************
翻墙
scrapy crawl langyou -o langyou.json
https://951yu.com/
'''


class LangyouSpider(scrapy.Spider):
    # 前缀
    prefix = 'https://'
    # 中缀
    website = '951yu'
    # 后缀
    suffix = '.com/'
    name = 'langyou'
    allowed_domains = [website + '.com']
    start_urls = ['https://951yu.com/951yu-movie/']

    def __init__(self):
        self.i = 0

    def parse(self, response):
        # 获取字符串类型的网页内容
        content = get_data(response)
        video_url = re.findall(r'source:.*?\.m3u8', content, re.IGNORECASE)
        if len(video_url):
            json_str = response.xpath("//script[@type='application/ld+json']/text()").extract()
            dict_data = json.loads(json_str[0])
            name = dict_data['name']
            p_url = dict_data['image']
            tags = dict_data['author']['name']
            self.i = self.i + 1
            yield get_video_item(id=self.i, tags=tags, url="", name=name, pUrl='https:' + p_url, vUrl='https:' + video_url[0][9:])
        url_list = get_url(content)
        # 提取url
        for url in url_list:
            if "/951yu-xiaoshuoqu/" not in url and "/951yu-tupianqu/" not in url:
                if url.startswith('/'):
                    yield scrapy.Request(split_joint(self.prefix + self.website + self.suffix, url), callback=self.parse)